set more off

use "$data/incFinalBFRStehsil", clear //import day-tehsil-level data

//aggregate data by tehsil-month-year
collapse (sum) totCas inj totDead incidents inc strikes ///
(max) mo peace obamareview haqqanipakistan year month nwa quarter qtryr hyear ///
yhy $distFE $tehFE treat trtXnwa, ///
by(monyr dist tehsil distn tehsiln)

//generate month-year and half-year dummies
tab yhy, g(yhy)
tab monyr, g(my)

//generate half-year NWA interaction terms for event study regression
forv i = 1/18 {
	g yhy`i'Xnwa = yhy`i'*nwa
}
label variable yhy10 "0"
label variable yhy1Xnwa "-9"  //H1 2003
label variable yhy2Xnwa "-8" //H2 2003
label variable yhy3Xnwa "-7" //H1 2004
label variable yhy4Xnwa "-6" //H2 2004
label variable yhy5Xnwa "-5" //H1 2005
label variable yhy6Xnwa "-4" //H2 2005
label variable yhy7Xnwa "-3" //H1 2006
label variable yhy8Xnwa "-2" //H2 2006
label variable yhy9Xnwa "-1" //H1 2007
label variable yhy10Xnwa "0" //H2 2007
label variable yhy11Xnwa "1" //H1 2008
label variable yhy12Xnwa "2" //H2 2008
label variable yhy13Xnwa "3" //H1 2009
label variable yhy14Xnwa "4" //H2 2009
label variable yhy15Xnwa "5" //H1 2010
label variable yhy16Xnwa "6" //H2 2010
label variable yhy17Xnwa "7" //H1 2011
label variable yhy18Xnwa "8" //H2 2011

//ensure consistent district labelling
replace dist = "bajaur" if distn==1
replace dist = "khyber" if distn==2
replace dist = "kurram" if distn==3
replace dist = "mohmand" if distn==4
replace dist = "nwa" if distn==5
replace dist = "orakzai" if distn==6
replace dist = "swa" if distn==7

//variables for easily filtering data in robustness checks
g basic=(dist=="nwa" | dist=="kurram")
g basic2=(dist=="nwa" | dist=="bajaur")
g basic3=(dist=="nwa" | dist=="khyber")
g basic4=(dist=="nwa" | dist=="mohmand")
g basic5=(dist=="nwa" | dist=="orakzai")

//time trends
g timeTrend = monyr - ym(2008,1)
forv i = 1/7 {
	g d`i'XtiTr = dist`i'*timeTrend //district-specific
}
//tehsil-specific time trends, and military operations and peace deal indicators
forv i = 1/42 {
	g teh`i'XtiTr = teh`i'*timeTrend
	g teh`i'Xmo = teh`i'*mo
	g teh`i'Xpeace = teh`i'*peace
}

//district population density as of 1998
g pdens = 461 if distn==1
replace pdens = 212 if distn==2
replace pdens = 133 if distn==3
replace pdens = 146 if distn==4
replace pdens = 77 if distn==5
replace pdens = 147 if distn==6
replace pdens = 65 if distn==7

save "$data/incFinalMonthlyTehsil", replace //preliminary saving of monthly data

//process additional covariates data
import delimited "$rawData/bt.csv", clear
rename agency dist
encode dist, g(distn)
drop if year<2003
drop if year>2011
save "$data/bt", replace //save for merge

//merge additional covariates data
use "$data/incFinalMonthlyTehsil", clear
merge m:1 dist year using "$data/bt"
drop _merge
save "$data/incFinalMonthlyTehsil", replace

//save district-month-level version of data
collapse (sum) incidents totCas strikes (max) peace mo pdens $distFE treat ///
trtXnwa area population male female validvotes rejectvotes totalvotes ///
rgisteredvotes education health agriculture numberofseats schoolsdestroyed ///
schoolsper100k wheatcrop registration cf percent pop pp, by(monyr distn dist)
save "$data/incFinalMonthlyDist", replace

//construct district-month-level drone strikes variable
keep strikes monyr dist
rename strikes strikesDist
save "$data/strikesMonthlyDist", replace //save for merge


use "$data/incFinalMonthlyTehsil", clear

//merge strikes data with violence data
merge m:1 dist monyr using "$data/strikesMonthlyDist"
drop _merge

//construct normalized variables
g tmp = .

sort dist
replace tmp = incidents if treat==0
by dist: egen mean_inc = mean(tmp)
by dist: egen sd_inc = sd(tmp)
g std_inc = (incidents-mean_inc)/sd_inc
g incidents_prop = (incidents-mean_inc)/mean_inc

sort dist
replace tmp = totCas if treat==0
by dist: egen mean_cas = mean(tmp)
by dist: egen sd_cas = sd(tmp)
g std_cas = (totCas-mean_cas)/sd_cas
g totCas_prop = (totCas-mean_cas)/mean_cas
drop tmp mean_* sd_*

drop if tehsiln==.

save "$data/incFinalMonthlyTehsil", replace //save final violence dataset
